clear all
cd "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\"


*****************************************
***Import and Clean Data for Each Year***
*****************************************
*First import data from S Reber via SERS on the number of black studnets in desegregated schools for each year in the pre-1964 period
*AL
foreach y of numlist 63 64 {
clear
insheet using Pre64_SERS\AL\sers`y'-al.txt, double
tostring oedist,  replace
gen oedist2 = string(real(oedist),"%04.0f")
g distcode=("100"+oedist2)
destring distcode, replace
g frac_stud_deseg= desegenrblack / totenrblack
keep year frac_stud_deseg distcode distname
save AL`y', replace
}

*GA
foreach y of numlist 61 62 63 64 {
clear
insheet using Pre64_SERS\GA\sers`y'-ga.txt, double
tostring oedist,  replace
gen oedist2 = string(real(oedist),"%04.0f")
g distcode=("200"+oedist2)
destring distcode, replace
g frac_stud_deseg= desegenrblack / totenrblack
keep year frac_stud_deseg distcode distname
save GA`y', replace
}

*LA
foreach y of numlist 60 61 62 63 64 {
clear
insheet using Pre64_SERS\LA\sers`y'-la.txt, double
tostring oedist,  replace
gen oedist2 = string(real(oedist),"%04.0f")
g distcode=("280"+oedist2)
destring distcode, replace
g frac_stud_deseg= desegenrblack / totenrblack
keep year frac_stud_deseg distcode distname
save LA`y', replace
}

*MS
foreach y of numlist 64 {
clear
insheet using Pre64_SERS\MS\sers`y'-ms.txt, double
tostring oedist,  replace
gen oedist2 = string(real(oedist),"%04.0f")
g distcode=("340"+oedist2)
destring distcode, replace
g frac_stud_deseg= desegenrblack / totenrblack
keep year frac_stud_deseg distcode distname
save MS`y', replace
}

*SC
foreach y of numlist 63 64 {
clear
insheet using Pre64_SERS\SC\sers`y'-sc.txt, double
tostring oedist,  replace
gen oedist2 = string(real(oedist),"%04.0f")
g distcode=("500"+oedist2)
destring distcode, replace
g frac_stud_deseg= desegenrblack / totenrblack
keep year frac_stud_deseg distcode distname
save SC`y', replace
}

*TN
foreach y of numlist 60 61 62 63 64 {
clear
insheet using Pre64_SERS\TN\sers`y'-tn.txt, double
tostring oedist,  replace
gen oedist2 = string(real(oedist),"%04.0f")
g distcode=("520"+oedist2)
destring distcode, replace
g frac_stud_deseg= desegenrblack / totenrblack
keep year frac_stud_deseg distcode distname
save TN`y', replace
}

*VA
foreach y of numlist 60 61 62 63 64 {
clear
insheet using Pre64_SERS\VA\sers`y'-va.txt, double
tostring oedist,  replace
gen oedist2 = string(real(oedist),"%04.0f")
g distcode=("560"+oedist2)
destring distcode, replace
g frac_stud_deseg= desegenrblack / totenrblack
keep year frac_stud_deseg distcode distname
save VA`y', replace
}

*append them all
clear
local states="AL GA LA MS SC TN VA"
foreach s of local states {
foreach y of numlist 60(1)64 {
cap append using `s'`y'
}
}
sort distcode year
duplicates drop distcode year, force
save SERS_Pre1964, replace



*Now import and clean hand-entered data from print reports state by state. These are *district* level counts of teachers and students by race
*VA
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\VA_HandEnter.xlsx", sheet("Sheet1") firstrow clear
drop white_teach1959 black_teach1959 white_stud1959 black_stud1959 //entered 1959, but don't need them 
local vars "white_teach black_teach white_stud black_stud"
foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==5 & obscntblack_teach==5 & obscntwhite_stud==5 & obscntblack_stud==5
drop obscnt*
save VAclean, replace 


*MS
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\MS_HandEnter.xlsx", sheet("Sheet1") firstrow clear
destring white_teach1960, force replace
local vars "white_teach black_teach white_stud black_stud"

foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==3 & obscntblack_teach==3 & obscntwhite_stud==3 & obscntblack_stud==3
drop obscnt*

foreach x of local vars { 
g `x'1963=`x'1962+(`x'1962-`x'1961)
g `x'1964=`x'1962+2*(`x'1962-`x'1961)
}

save MSclean, replace 


*LA
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\LA_HandEnter.xlsx", sheet("Sheet1") firstrow clear
local vars "white_teach black_teach white_stud black_stud"
foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==5 & obscntblack_teach==5 & obscntwhite_stud==5 & obscntblack_stud==5
drop obscnt*
save LAclean, replace 


*TX
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\TX_HandEnter_1964.xlsx", sheet("Sheet1") firstrow clear //1964 stored in seperate file, merge it with 1960 and 1962 here
save TX1964, replace
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\TX_HandEnter.xlsx", sheet("Sheet1") firstrow clear
merge 1:1 distcode using TX1964, nogen
local vars "white_teach black_teach white_stud black_stud"

foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==3 & obscntblack_teach==3 & obscntwhite_stud==3 & obscntblack_stud==3
drop obscnt*

foreach x of local vars { 
g `x'1961=`x'1960+.5*(`x'1962-`x'1960)
g `x'1963=`x'1962+.5*(`x'1964-`x'1962)
}
save TXclean, replace 

*SC
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\SC_HandEnter.xlsx", sheet("Sheet1") firstrow clear
local vars "white_teach black_teach white_stud black_stud"

foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==3 & obscntblack_teach==3 & obscntwhite_stud==3 & obscntblack_stud==3
drop obscnt*

foreach x of local vars { 
g `x'1964=`x'1963+.5*(`x'1963-`x'1961) //mean change from 1961-1963, the two most recent observed years
g `x'1962=`x'1961+(`x'1961-`x'1960)
}
save SCclean, replace 

*TN
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\TN_HandEnter.xlsx", sheet("Sheet1") firstrow clear
local vars "white_teach black_teach white_stud black_stud"

foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==3 & obscntblack_teach==3 & obscntwhite_stud==3 & obscntblack_stud==3
drop obscnt*

foreach x of local vars { 
g `x'1963=`x'1962+(`x'1962-`x'1961)
g `x'1964=`x'1962+2*(`x'1962-`x'1961)
}

save TNclean, replace 

*AL
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\AL_HandEnter.xlsx", sheet("Sheet1") firstrow clear
local vars "white_teach black_teach white_stud black_stud"

foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==4 & obscntblack_teach==4 & obscntwhite_stud==4 & obscntblack_stud==4
drop obscnt*

foreach x of local vars { 
g `x'1964=`x'1963+(`x'1963-`x'1962)
}
save ALclean, replace 

*GA
import excel "C:\Users\ot3\Documents\TeacherDeseg\OCRandSERS\GA_HandEnter.xlsx", sheet("Sheet1") firstrow clear 
local vars "white_teach black_teach white_stud black_stud"

foreach x of local vars { 
egen obscnt`x'=rownonmiss(`x'*)
}
keep if obscntwhite_teach==3 & obscntblack_teach==3 & obscntwhite_stud==3 & obscntblack_stud==3
drop obscnt*

foreach x of local vars { 
g `x'1960=(`x'1959+`x'1961)/2
g `x'1962=(`x'1961+`x'1963)/2
g `x'1964=`x'1963+.5*(`x'1963-`x'1961)
}

drop *1959
save GAclean, replace 

*append data from each state for 1960-1964 together
use VAclean, clear,
append using MSclean
append using LAclean
append using TXclean
append using SCclean
append using TNclean
append using ALclean
append using GAclean


drop if black_teach1960==0 & black_teach1961==0 & black_teach1962==0 & black_teach1963==0 & black_teach1964==0

reshape long white_teach black_teach white_stud black_stud, i(distcode) j(year) 

foreach x of varlist white_teach black_teach white_stud black_stud { 
replace `x'=0 if `x'<0 //in handful of cases the linear extrapolaitons create negative numbers of students or teachers
}

rename black_stud black_stud_cnt 
rename white_stud white_stud_cnt
rename white_teach white_teach_cnt 
rename black_teach black_teach_cnt

egen stud_cnt=rowtotal(black_stud_cnt white_stud_cnt), missing
egen teach_cnt=rowtotal(white_teach_cnt black_teach_cnt), missing
g frac_stud_black=black_stud_cnt/stud_cnt
g frac_teach_black=black_teach_cnt/teach_cnt


*merge on student desegreagtion data from pre-1964
merge 1:1 distcode year using SERS_Pre1964
drop if _merge==2
drop _merge


*Use SERS state averages for Texas, which does not have district level desgregation data pre-1964
replace frac_stud_deseg=.0726 if OCRstate==44 & year==1964 //TX
replace frac_stud_deseg=.0429 if OCRstate==44 & year==1963 //TX
replace frac_stud_deseg=.016 if OCRstate==44 & year==1962 //TX
replace frac_stud_deseg=.0142 if OCRstate==44 & year==1961 //TX
replace frac_stud_deseg=.0121 if OCRstate==44 & year==1960 //TX

*Set student desegreagtion as zero when unavailable
replace frac_stud_deseg=0 if frac_stud_deseg==. & OCRstate!=44

g exp_index=1  
g dis_index=1
save teachers1964, replace


*Import and clean hand-entered data for 1967, which is *school* level 
import excel "C:\Users\ot3\Documents\TeacherDeseg\HandEntered1967.xlsx", sheet("Sheet1") firstrow clear

drop if white_stud_cnt==. & black_stud_cnt==. & white_teach_cnt==. & black_teach_cnt==. //these are districts not in the 1967 data at all
replace white_stud_cnt=0 if white_stud_cnt==. //The book just has blanks when a school had zero black/white students/teachers. Usually my RA entered a zero, but these are a few cases where he left it blank instead
replace black_stud_cnt=0 if black_stud_cnt==.
replace white_teach_cnt=0 if white_teach_cnt==.
replace white_teach_cnt=0 if white_teach_cnt==.

drop if black_stud_cnt==0 & white_stud_cnt==0
drop if black_teach_cnt==0 & white_teach_cnt==0

*calculate school level characteristics	
g pctw=white_stud_cnt/(black_stud_cnt+white_stud_cnt)
g pctb=black_stud_cnt/(black_stud_cnt+white_stud_cnt)
g school_ds=black_stud_cnt>0 & pctw>.05 
g black_stud_dscnt=0 if school_ds==0
replace black_stud_dscnt=black_stud_cnt if school_ds==1
g black_teach_dscnt=0 if school_ds==0
replace black_teach_dscnt=black_teach_cnt if school_ds==1
 
 *calculate explation and dissimilarity indices
bysort distcode: egen black_dtot=total(black_stud_cnt)
bysort distcode: egen white_dtot=total(white_stud_cnt)
g exp_index=(black_stud_cnt/black_dtot)*(black_stud_cnt/(black_stud_cnt+white_stud_cnt))
g dis_index=abs((black_stud_cnt/black_dtot)-(white_stud_cnt/white_dtot))

*collpase to district level
collapse (sum) white_stud_cnt black_stud_cnt black_stud_dscnt white_teach_cnt black_teach_cnt black_teach_dscnt exp_index dis_index (first) OCRstate  distname (count) school_cnt=black_stud_cnt, by(distcode)
replace dis_index=.5*dis_index

*generate working variables
egen stud_cnt=rowtotal(black_stud_cnt white_stud_cnt)
egen teach_cnt=rowtotal(white_teach_cnt black_teach_cnt)
g frac_stud_black=black_stud_cnt/stud_cnt
g frac_teach_black=black_teach_cnt/teach_cnt
g frac_stud_deseg=black_stud_dscnt/black_stud_cnt
g frac_teach_deseg=black_teach_dscnt/black_teach_cnt

g year=1967
save teachers1967, replace


*Import and clean 1968 data from OCR via S Reber, which is school level
import delimited OCR68.txt, clear 
keep if v1==2 //drops district-level data, retaining schools
 rename v7 distcode
 rename v8 distname
 rename v178 OCRstate
 rename v181 OCRcnty
 rename v113 black_stud_cnt
 rename v116 white_stud_cnt
 rename v137 black_teach_cnt
 rename v140 white_teach_cnt
 drop v*
 drop if black_stud_cnt==0 & white_stud_cnt==0
 drop if black_teach_cnt==0 & white_teach_cnt==0

*calculate school level characteristics
replace white_stud_cnt=0 if white_stud_cnt==.
g pctw=white_stud_cnt/(black_stud_cnt+white_stud_cnt)
g pctb=white_stud_cnt/(black_stud_cnt+white_stud_cnt)
g school_ds=black_stud_cnt>0 & pctw>.05 & black_stud_cnt!=. & white_stud_cnt!=.
g black_stud_dscnt=0 if school_ds==0
replace black_stud_dscnt=black_stud_cnt if school_ds==1
g black_teach_dscnt=0 if school_ds==0
replace black_teach_dscnt=black_teach_cnt if school_ds==1

*calculate exposure and dissimilarity indices
bysort distcode: egen black_dtot=total(black_stud_cnt)
bysort distcode: egen white_dtot=total(white_stud_cnt)
g exp_index=(black_stud_cnt/black_dtot)*(black_stud_cnt/(black_stud_cnt+white_stud_cnt))
g dis_index=abs((black_stud_cnt/black_dtot)-(white_stud_cnt/white_dtot))

*collapse to district level
collapse (sum) white_stud_cnt black_stud_cnt black_stud_dscnt white_teach_cnt black_teach_cnt black_teach_dscnt exp_index dis_index (first) OCRstate  OCRcnty distname (count) school_cnt=black_stud_cnt, by(distcode)
replace dis_index=.5*dis_index

*calculate fraction of students/teachers in desegregated schools
egen stud_cnt=rowtotal(black_stud_cnt white_stud_cnt)
egen teach_cnt=rowtotal(white_teach_cnt black_teach_cnt)
g frac_stud_black=black_stud_cnt/stud_cnt
g frac_teach_black=black_teach_cnt/teach_cnt
g frac_stud_deseg=black_stud_dscnt/black_stud_cnt
g frac_teach_deseg=black_teach_dscnt/black_teach_cnt

g year=1968
save teachers1968, replace


*Import and clean 1970 data from OCR via S Reber, which is school level
import delimited OCR70.txt, clear 
keep if v1==2 //drops district-level data, retaining schools
 rename v7 distcode
 rename v178 OCRstate	
 rename v113 black_stud_cnt
 rename v116 white_stud_cnt
 rename v137 black_teach_cnt
 rename v140 white_teach_cnt
 drop v*
 drop if black_stud_cnt==0 & white_stud_cnt==0
 drop if black_teach_cnt==0 & white_teach_cnt==0
 
*calculate school level characteristics
g pctw=white_stud_cnt/(black_stud_cnt+white_stud_cnt)
g school_ds=black_stud_cnt>0 & pctw>.05 & black_stud_cnt!=. & white_stud_cnt!=.
g black_stud_dscnt=0 if school_ds==0
replace black_stud_dscnt=black_stud_cnt if school_ds==1
g black_teach_dscnt=0 if school_ds==0
replace black_teach_dscnt=black_teach_cnt if school_ds==1

*calculate explation and dissimilarity indices
bysort distcode: egen black_dtot=total(black_stud_cnt)
bysort distcode: egen white_dtot=total(white_stud_cnt)
g exp_index=(black_stud_cnt/black_dtot)*(black_stud_cnt/(black_stud_cnt+white_stud_cnt))
g dis_index=abs((black_stud_cnt/black_dtot)-(white_stud_cnt/white_dtot))

*collapse to district level
collapse (sum) white_stud_cnt black_stud_cnt black_stud_dscnt white_teach_cnt black_teach_cnt black_teach_dscnt exp_index dis_index (first) OCRstate (count) school_cnt=black_stud_cnt, by(distcode)
replace dis_index=.5*dis_index

*generate working variables
egen stud_cnt=rowtotal(black_stud_cnt white_stud_cnt)
egen teach_cnt=rowtotal(white_teach_cnt black_teach_cnt)
g frac_stud_black=black_stud_cnt/stud_cnt
g frac_teach_black=black_teach_cnt/teach_cnt
g frac_stud_deseg=black_stud_dscnt/black_stud_cnt
g frac_teach_deseg=black_teach_dscnt/black_teach_cnt

g year=1970
save teachers1970, replace


*Import and clean 1972 data from OCR via S Reber, which is school level
import delimited OCR72.txt, clear 
keep if v1==2 //drops district-level data, retaining schools
 rename v7 distcode 
 rename v178 OCRstate	
 rename v113 black_stud_cnt
 rename v116 white_stud_cnt
 rename v137 black_teach_cnt
 rename v140 white_teach_cnt
 drop v*
 drop if black_stud_cnt==0 & white_stud_cnt==0
 drop if black_teach_cnt==0 & white_teach_cnt==0
 
*calculate school level characteristics
g pctw=white_stud_cnt/(black_stud_cnt+white_stud_cnt)
g school_ds=black_stud_cnt>0 & pctw>.05 & black_stud_cnt!=. & white_stud_cnt!=.
g black_stud_dscnt=0 if school_ds==0
replace black_stud_dscnt=black_stud_cnt if school_ds==1
g black_teach_dscnt=0 if school_ds==0
replace black_teach_dscnt=black_teach_cnt if school_ds==1

*calculate explation and dissimilarity indices
bysort distcode: egen black_dtot=total(black_stud_cnt)
bysort distcode: egen white_dtot=total(white_stud_cnt)
g exp_index=(black_stud_cnt/black_dtot)*(black_stud_cnt/(black_stud_cnt+white_stud_cnt))
g dis_index=abs((black_stud_cnt/black_dtot)-(white_stud_cnt/white_dtot))

*collapse to district level
collapse (sum) white_stud_cnt black_stud_cnt black_stud_dscnt white_teach_cnt black_teach_cnt black_teach_dscnt exp_index dis_index (first) OCRstate (count) school_cnt=black_stud_cnt, by(distcode)
replace dis_index=.5*dis_index

*generate working variables
egen stud_cnt=rowtotal(black_stud_cnt white_stud_cnt)
egen teach_cnt=rowtotal(white_teach_cnt black_teach_cnt)
g frac_stud_black=black_stud_cnt/stud_cnt
g frac_teach_black=black_teach_cnt/teach_cnt
g frac_stud_deseg=black_stud_dscnt/black_stud_cnt
g frac_teach_deseg=black_teach_dscnt/black_teach_cnt

g year=1972
save teachers1972, replace

****************************************************
***Merge/Append Annual Data Into Working Data Set***
****************************************************
*Append 1964, 1967, 1968, 1970 and 1972 data for districts in all 8 states with 1964 data available and create working 8-state data set
use teachers1972, clear
append using teachers1970
append using teachers1968
append using teachers1967
append using teachers1964

*restrict by state
keep if inlist(OCRstate,1,11,19,25,41, 43,44,47)		

*Merge on FIPS codes (OCR uses its own geographic codes)
bysort distcode: egen OCRcounty=max(OCRcnty)
merge m:1 OCRstate OCRcounty using OCR_FIPS_xwalk
drop if _merge==2
drop OCRcnty _merge


*Generate baseline (1964) levels of different variables for heterogeneity analysis, weighting
foreach x of varlist frac_teach_black black_teach_cnt black_stud_cnt stud_cnt {
g `x'64=`x' if year==1964
bysort distcode: egen `x'1964=max(`x'64)
drop `x'64
}
g bshare1964=black_stud_cnt1964/ stud_cnt1964


*Take logs, adding 1 to preserve zeros where needed
g lblackteach=ln(black_teach_cnt+1)
g lwhiteteach=ln(white_teach_cnt+1)
g lteach=ln(teach_cnt+1)
g lstud_cnt=ln(stud_cnt+1)
g lschool_cnt=ln(school_cnt)
g stratio=stud_cnt/ teach_cnt
g lstratio=ln(stratio)

*Keep districts where primary variables were observed in all 9 years. 
foreach x of varlist lblackteach frac_stud_deseg {
bysort distcode: egen obscnt=count(`x') 
keep if obscnt==9
drop obscnt
}

*tag districts with no black teachers in a year other than 1964
g tmpzbt=black_teach_cnt==0
bysort distcode: egen zero_black_teach=max(tmpzbt) 
drop tmpzbt


*merge on school finance data from Cascio et al.2013. First construct NCES district IDs for merging
rename statefip fipst
tostring fipst,  replace
tostring distcode,  replace
g distcode2=substr(distcode,-5,.)
g ncesid=fipst+distcode2
destring ncesid, replace
destring distcode, replace
destring fipst, replace
destring countyfip, replace
drop  distcode2 


*Merge reorganization indicator
merge m:1 ncesid using reorgdata
drop if _merge==2
drop _merge

*merge on county-level war on poverty data
tostring fipst, format(%02.0f) replace
tostring countyfip, format(%03.0f) replace
g fips=fipst+countyfip
destring fips, replace ignore(".")

merge m:1 fips year using countypolicies 
drop if _merge==2
drop _merge

foreach x of varlist tranpcmcaid tranpcfsp tranpcssi tranpcmcare tranpcafdc tranpcga hsfund {
replace `x'=`x'/1000
}


save teachers_8state, replace





*******************************************
***Create 11 state sample for robustness***
*******************************************
*Append 1967, 1968, 1970 and 1972 data for districts in all 11 states of former confederacy and create working 11-state data set
use teachers1972, clear
append using teachers1970
append using teachers1968
append using teachers1967

*restrict by state
keep if inlist(OCRstate,1,4,10,11,19,25,34,41, 43, 44,47)		//AL, AR, FL, GA, LA, MS, NC , SC, TN, TX, VA	

*Merge on FIPS codes (OCR uses its own geographic codes)
bysort distcode: egen OCRcounty=max(OCRcnty)
merge m:1 OCRstate OCRcounty using OCR_FIPS_xwalk
drop if _merge==2
drop OCRcnty _merge


*Take logs of teacher counts
g lblackteach=ln(black_teach_cnt+1)
g lteach=ln(teach_cnt+1)

*Keep districts observed in all 4 years
foreach x of varlist lblackteach frac_stud_deseg {
bysort distcode: egen obscnt=count(`x') 
keep if obscnt==4
drop obscnt
}

save teachers_11state, replace







